Estep <- function(data, G, para){
# Your Code
# Return the n-by-G probability matrix
n = dim(data)[1]
z = outer(1:n, 1:G,FUN = Vectorize(function(i, j)
-(1/2) * as.matrix(data[i,] - para$mean[,j]) %*% solve(para$Sigma) %*% t(as.matrix(data[i,] - para$mean[,j])) + log(para$prob[j]) )
)
# Subtracting max value of z to handle underflow problem
z_max = apply(z, 1, max)
z = exp(z - z_max) / rowSums(exp(z - z_max))
z
}
Mstep <- function(data, G, para, post.prob){
# Your Code
# Return the updated parameters
n = dim(data)[1]
d = dim(data)[2]
para$prob = colSums(post.prob) / n
para$mean = outer(1:d, 1:G,FUN = Vectorize(function(i, j)
sum(as.matrix(post.prob[,j]*data[,i])) / sum(as.matrix(post.prob[,j]))
) )
para$Sigma = matrix(0,d,d)
for (g in 1:G){
for (i in 1:n){
para$Sigma = para$Sigma + (t(data[i,]) - para$mean[,g]) %*% t(t(data[i,]) - para$mean[,g]) * post.prob[i,g]
}
}
para$Sigma = para$Sigma / n
para
}
myEM <- function(data, itmax, G, para){
# itmax: num of iterations
# G: num of components
# para: list of parameters (prob, mean, Sigma)
for(t in 1:itmax){
post.prob <- Estep(data, G, para)
para <- Mstep(data, G, para, post.prob)
}
return(para)
}
options(digits=8)
options()$digits
## [1] 8
library(mclust)
## Package 'mclust' version 5.4.7
## Type 'citation("mclust")' for citing this R package in publications.
dim(faithful)
## [1] 272 2
head(faithful)
## eruptions waiting
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
n = nrow(faithful)
K <- 2
set.seed(2064)
gID <- sample(1:K, n, replace = TRUE)
Z <- matrix(0, n, K)
for(k in 1:K)
Z[gID == k, k] <- 1
ini0 <- mstep(modelName="EEE", faithful , Z)$parameters
para0 <- list(prob = ini0$pro,
mean = ini0$mean,
Sigma = ini0$variance$Sigma)
para0
## $prob
## [1] 0.5 0.5
##
## $mean
## [,1] [,2]
## eruptions 3.5146544 3.4609118
## waiting 71.2573529 70.5367647
##
## $Sigma
## eruptions waiting
## eruptions 1.2972168 13.916737
## waiting 13.9167373 184.014003
myEM(data=faithful, itmax=20, G=K, para=para0)
## $prob
## [1] 0.50002545 0.49997455
##
## $mean
## [,1] [,2]
## [1,] 3.5148192 3.4607442
## [2,] 71.2592307 70.5348501
##
## $Sigma
## eruptions waiting
## eruptions 1.2972079 13.916626
## waiting 13.9166261 184.012633
Rout <- em(modelName = "EEE", data = faithful,
control = emControl(eps=0, tol=0, itmax = 20),
parameters = ini0)$parameters
list(Rout$pro, Rout$mean, Rout$variance$Sigma)
## [[1]]
## [1] 0.50002545 0.49997455
##
## [[2]]
## [,1] [,2]
## eruptions 3.5148192 3.4607442
## waiting 71.2592307 70.5348501
##
## [[3]]
## eruptions waiting
## eruptions 1.2972079 13.916626
## waiting 13.9166261 184.012633
K <- 3
gID <- sample(1:K, n, replace = TRUE)
Z <- matrix(0, n, K)
for(k in 1:K)
Z[gID == k, k] <- 1
ini0 <- mstep(modelName="EEE", faithful , Z)$parameters
para0 <- list(prob = ini0$pro,
mean = ini0$mean,
Sigma = ini0$variance$Sigma)
para0
## $prob
## [1] 0.30147059 0.34558824 0.35294118
##
## $mean
## [,1] [,2] [,3]
## eruptions 3.4613415 3.5320851 3.4669896
## waiting 70.5000000 70.3404255 71.7812500
##
## $Sigma
## eruptions waiting
## eruptions 1.2968972 13.938265
## waiting 13.9382649 183.713282
myEM(data=faithful, itmax=20, G=K, para=para0)
## $prob
## [1] 0.30166706 0.34746712 0.35086582
##
## $mean
## [,1] [,2] [,3]
## [1,] 3.4463918 3.5422278 3.4694531
## [2,] 69.9897649 70.4347856 72.1349264
##
## $Sigma
## eruptions waiting
## eruptions 1.2962742 13.931796
## waiting 13.9317964 183.283598
Rout <- em(modelName = "EEE", data = faithful,
control = emControl(eps=0, tol=0, itmax = 20),
parameters = ini0)$parameters
list(Rout$pro, Rout$mean, Rout$variance$Sigma)
## [[1]]
## [1] 0.30166706 0.34746712 0.35086582
##
## [[2]]
## [,1] [,2] [,3]
## eruptions 3.4463918 3.5422278 3.4694531
## waiting 69.9897649 70.4347856 72.1349264
##
## [[3]]
## eruptions waiting
## eruptions 1.2962742 13.931796
## waiting 13.9317964 183.283598
1 and 2
3 and 4
5
5